In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
import os
%matplotlib inline
In [2]:
spotify_data = pd.read_csv('C:\\Users\\RS\\data.csv')
genre_data = pd.read_csv('C:\\Users\\RS\\data_by_genres.csv')
data_by_year = pd.read_csv('C:\\Users\\RS\\data_by_year.csv')
In [3]:
spotify_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 170653 entries, 0 to 170652
Data columns (total 19 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   valence           170653 non-null  float64
 1   year              170653 non-null  int64  
 2   acousticness      170653 non-null  float64
 3   artists           170653 non-null  object 
 4   danceability      170653 non-null  float64
 5   duration_ms       170653 non-null  int64  
 6   energy            170653 non-null  float64
 7   explicit          170653 non-null  int64  
 8   id                170653 non-null  object 
 9   instrumentalness  170653 non-null  float64
 10  key               170653 non-null  int64  
 11  liveness          170653 non-null  float64
 12  loudness          170653 non-null  float64
 13  mode              170653 non-null  int64  
 14  name              170653 non-null  object 
 15  popularity        170653 non-null  int64  
 16  release_date      170653 non-null  object 
 17  speechiness       170653 non-null  float64
 18  tempo             170653 non-null  float64
dtypes: float64(9), int64(6), object(4)
memory usage: 24.7+ MB
In [4]:
genre_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2973 entries, 0 to 2972
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   mode              2973 non-null   int64  
 1   genres            2973 non-null   object 
 2   acousticness      2973 non-null   float64
 3   danceability      2973 non-null   float64
 4   duration_ms       2973 non-null   float64
 5   energy            2973 non-null   float64
 6   instrumentalness  2973 non-null   float64
 7   liveness          2973 non-null   float64
 8   loudness          2973 non-null   float64
 9   speechiness       2973 non-null   float64
 10  tempo             2973 non-null   float64
 11  valence           2973 non-null   float64
 12  popularity        2973 non-null   float64
 13  key               2973 non-null   int64  
dtypes: float64(11), int64(2), object(1)
memory usage: 325.3+ KB
In [5]:
data_by_year.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   mode              100 non-null    int64  
 1   year              100 non-null    int64  
 2   acousticness      100 non-null    float64
 3   danceability      100 non-null    float64
 4   duration_ms       100 non-null    float64
 5   energy            100 non-null    float64
 6   instrumentalness  100 non-null    float64
 7   liveness          100 non-null    float64
 8   loudness          100 non-null    float64
 9   speechiness       100 non-null    float64
 10  tempo             100 non-null    float64
 11  valence           100 non-null    float64
 12  popularity        100 non-null    float64
 13  key               100 non-null    int64  
dtypes: float64(11), int64(3)
memory usage: 11.1 KB

Exploratory Data Analysis¶

In [6]:
import plotly.express as px
In [7]:
sound_features = ['acousticness', 'danceability', 'energy',
                  'instrumentalness', 'liveness', 'valence']
fig = px.line(data_by_year, x='year', y=sound_features)
fig.show()
In [8]:
fig = px.line(data_by_year, x='year', y='tempo')
fig.show()
In [9]:
# Boxplot of danceability by genre
sns.boxplot(x='danceability', y='genres', data=genre_data)
plt.title("Danceability by Genre")
plt.show()
In [10]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# assume genre_data is a pandas DataFrame containing the relevant data

# calculate mean danceability for each genre
genre_danceability = genre_data.groupby('genres')['danceability'].mean()

# sort genres by danceability and select top 10
top_genres = genre_danceability.sort_values(ascending=False).head(10).index.tolist()

# filter the data for only the top 10 genres
genre_data_top10 = genre_data[genre_data['genres'].isin(top_genres)]

# create the histogram
sns.histplot(x='danceability', hue='genres', data=genre_data_top10, multiple='stack')
plt.title("Danceability by Top 10 Genres")
plt.show()
In [34]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# assume genre_data is a pandas DataFrame containing the relevant data

# calculate mean danceability for each genre
genre_danceability = genre_data.groupby('genres')['danceability'].mean()

# sort genres by danceability and select top 10
top_genres = genre_danceability.sort_values(ascending=False).head(10)

# create a color palette
palette = sns.color_palette("hls", 10)

# create the bar plot with a different color for each bar
sns.barplot(x=top_genres.values, y=top_genres.index, palette=palette)
plt.title("Top 10 Genres by Mean Danceability")
plt.xlabel("Mean Danceability")
plt.ylabel("Genre")
plt.show()

Characteristics of Differenct Genres¶

In [13]:
top_10_genres = genre_data.nlargest(10, 'popularity')
fig = px.bar(top_10_genres, x='genres', y=['valence', 'energy',
                                           'danceability', 'acousticness'],
            barmode='group')
fig.show()

Clustering Genres with K-Means¶

In [14]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

cluster_pipeline = Pipeline([('scaler', StandardScaler()),
                            ('kmeans', KMeans(n_clusters=10))])
X = genre_data.select_dtypes(np.number)
cluster_pipeline.fit(X)
genre_data['cluster'] = cluster_pipeline.predict(X)

Visualizing the Genre Clusters with t-SNE¶

In [15]:
from sklearn.manifold import TSNE
In [16]:
tsne_pipeline = Pipeline([('scaler', StandardScaler()),
                         ('tsne', TSNE(n_components=2, verbose=2))])
genre_embedding = tsne_pipeline.fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=genre_embedding)
projection['genres'] = genre_data['genres']
projection['cluster'] = genre_data['cluster']
D:\G\ss\lib\site-packages\sklearn\manifold\_t_sne.py:780: FutureWarning:

The default initialization in TSNE will change from 'random' to 'pca' in 1.2.

D:\G\ss\lib\site-packages\sklearn\manifold\_t_sne.py:790: FutureWarning:

The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 2973 samples in 0.148s...
[t-SNE] Computed neighbors for 2973 samples in 0.373s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2973
[t-SNE] Computed conditional probabilities for sample 2000 / 2973
[t-SNE] Computed conditional probabilities for sample 2973 / 2973
[t-SNE] Mean sigma: 0.777516
[t-SNE] Computed conditional probabilities in 0.066s
[t-SNE] Iteration 50: error = 81.9775620, gradient norm = 0.0043106 (50 iterations in 0.903s)
[t-SNE] Iteration 100: error = 76.7715149, gradient norm = 0.0183697 (50 iterations in 0.759s)
[t-SNE] Iteration 150: error = 76.2398071, gradient norm = 0.0047412 (50 iterations in 0.603s)
[t-SNE] Iteration 200: error = 76.1542206, gradient norm = 0.0007833 (50 iterations in 0.941s)
[t-SNE] Iteration 250: error = 76.1253738, gradient norm = 0.0004384 (50 iterations in 0.986s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 76.125374
[t-SNE] Iteration 300: error = 1.8275498, gradient norm = 0.0011280 (50 iterations in 0.976s)
[t-SNE] Iteration 350: error = 1.5905074, gradient norm = 0.0003895 (50 iterations in 0.867s)
[t-SNE] Iteration 400: error = 1.5003856, gradient norm = 0.0002519 (50 iterations in 0.975s)
[t-SNE] Iteration 450: error = 1.4568652, gradient norm = 0.0001596 (50 iterations in 0.893s)
[t-SNE] Iteration 500: error = 1.4333408, gradient norm = 0.0001210 (50 iterations in 0.905s)
[t-SNE] Iteration 550: error = 1.4192455, gradient norm = 0.0000979 (50 iterations in 0.907s)
[t-SNE] Iteration 600: error = 1.4100113, gradient norm = 0.0001056 (50 iterations in 0.926s)
[t-SNE] Iteration 650: error = 1.4039299, gradient norm = 0.0000735 (50 iterations in 0.884s)
[t-SNE] Iteration 700: error = 1.3995304, gradient norm = 0.0000698 (50 iterations in 0.901s)
[t-SNE] Iteration 750: error = 1.3961930, gradient norm = 0.0000670 (50 iterations in 0.932s)
[t-SNE] Iteration 800: error = 1.3934026, gradient norm = 0.0000620 (50 iterations in 0.933s)
[t-SNE] Iteration 850: error = 1.3909042, gradient norm = 0.0000536 (50 iterations in 0.960s)
[t-SNE] Iteration 900: error = 1.3887153, gradient norm = 0.0000481 (50 iterations in 0.864s)
[t-SNE] Iteration 950: error = 1.3869228, gradient norm = 0.0000530 (50 iterations in 0.987s)
[t-SNE] Iteration 1000: error = 1.3850130, gradient norm = 0.0000471 (50 iterations in 0.870s)
[t-SNE] KL divergence after 1000 iterations: 1.385013
In [17]:
import plotly.express as px
fig = px.scatter(
    projection, x='x', y='y', color='cluster', hover_data=['x', 'y',
                                                           'genres']
)
fig.show()
In [18]:
genre_data
Out[18]:
mode genres acousticness danceability duration_ms energy instrumentalness liveness loudness speechiness tempo valence popularity key cluster
0 1 21st century classical 0.979333 0.162883 1.602977e+05 0.071317 0.606834 0.361600 -31.514333 0.040567 75.336500 0.103783 27.833333 6 2
1 1 432hz 0.494780 0.299333 1.048887e+06 0.450678 0.477762 0.131000 -16.854000 0.076817 120.285667 0.221750 52.500000 5 1
2 1 8-bit 0.762000 0.712000 1.151770e+05 0.818000 0.876000 0.126000 -9.180000 0.047000 133.444000 0.975000 48.000000 7 8
3 1 [] 0.651417 0.529093 2.328809e+05 0.419146 0.205309 0.218696 -12.288965 0.107872 112.857352 0.513604 20.859882 7 4
4 1 a cappella 0.676557 0.538961 1.906285e+05 0.316434 0.003003 0.172254 -12.479387 0.082851 112.110362 0.448249 45.820071 7 6
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2968 1 zolo 0.222625 0.547082 2.580991e+05 0.610240 0.143872 0.204206 -11.295878 0.061088 125.494919 0.596155 33.778943 9 6
2969 0 zouglou 0.161000 0.863000 2.063200e+05 0.909000 0.000000 0.108000 -5.985000 0.081300 119.038000 0.845000 58.000000 7 7
2970 1 zouk 0.263261 0.748889 3.060728e+05 0.622444 0.257227 0.089678 -10.289222 0.038778 101.965222 0.824111 46.666667 5 8
2971 0 zurich indie 0.993000 0.705667 1.984173e+05 0.172667 0.468633 0.179667 -11.453333 0.348667 91.278000 0.739000 0.000000 7 4
2972 1 zydeco 0.421038 0.629409 1.716717e+05 0.609369 0.019248 0.255877 -9.854825 0.050491 126.366087 0.808544 30.261905 7 8

2973 rows × 15 columns

Clustering songs with K-Means¶

In [19]:
song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
                                 ('kmeans', KMeans(n_clusters=20,
                                                   verbose=2))],
                                verbose=True)
X = spotify_data.select_dtypes(np.number)
number_cols = list(X.columns)
number_cols
song_cluster_pipeline.fit(X)
song_cluster_labels = song_cluster_pipeline.predict(X)
spotify_data['cluster_label'] = song_cluster_labels
[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.1s
Initialization complete
Iteration 0, inertia 1518431.2107395953
Iteration 1, inertia 1176719.0925323036
Iteration 2, inertia 1121839.8004857716
Iteration 3, inertia 1102035.0666062639
Iteration 4, inertia 1089771.9407152093
Iteration 5, inertia 1080722.2994542557
Iteration 6, inertia 1073522.1132717908
Iteration 7, inertia 1068280.7770367519
Iteration 8, inertia 1064824.5245022902
Iteration 9, inertia 1062666.099524576
Iteration 10, inertia 1061272.7287166417
Iteration 11, inertia 1060331.2738080064
Iteration 12, inertia 1059604.737659868
Iteration 13, inertia 1059159.2481732247
Iteration 14, inertia 1058852.1393562001
Iteration 15, inertia 1058630.6139641325
Iteration 16, inertia 1058497.489310806
Iteration 17, inertia 1058411.9682589525
Iteration 18, inertia 1058333.4947391595
Iteration 19, inertia 1058276.7663930529
Iteration 20, inertia 1058208.5289122937
Iteration 21, inertia 1058113.8050155211
Iteration 22, inertia 1058080.1120315497
Iteration 23, inertia 1058061.295989202
Iteration 24, inertia 1058054.4060224914
Iteration 25, inertia 1058049.6635927763
Iteration 26, inertia 1058046.3037710977
Iteration 27, inertia 1058044.0468948237
Converged at iteration 27: center shift 9.90817964533811e-05 within tolerance 0.00010000000000000789.
Initialization complete
Iteration 0, inertia 1559384.1776030476
Iteration 1, inertia 1195634.6516553042
Iteration 2, inertia 1158825.8920438814
Iteration 3, inertia 1138544.2063523107
Iteration 4, inertia 1121156.4330490923
Iteration 5, inertia 1106988.7620414924
Iteration 6, inertia 1095832.4532479884
Iteration 7, inertia 1088902.9679148134
Iteration 8, inertia 1084667.0004423996
Iteration 9, inertia 1081632.420884549
Iteration 10, inertia 1079509.896174779
Iteration 11, inertia 1077785.2641181888
Iteration 12, inertia 1076335.5046003303
Iteration 13, inertia 1074987.4209390075
Iteration 14, inertia 1073576.025524011
Iteration 15, inertia 1072047.5792999435
Iteration 16, inertia 1070304.8317476106
Iteration 17, inertia 1068423.0928194448
Iteration 18, inertia 1066725.9993725605
Iteration 19, inertia 1065539.5708809209
Iteration 20, inertia 1064767.6904959816
Iteration 21, inertia 1064277.904192029
Iteration 22, inertia 1063987.3453930086
Iteration 23, inertia 1063802.4312042985
Iteration 24, inertia 1063669.3306529338
Iteration 25, inertia 1063571.6131669658
Iteration 26, inertia 1063489.2908760826
Iteration 27, inertia 1063420.3420008158
Iteration 28, inertia 1063355.9751168643
Iteration 29, inertia 1063287.4510769665
Iteration 30, inertia 1063224.5580447044
Iteration 31, inertia 1063155.9786223928
Iteration 32, inertia 1063086.4996239473
Iteration 33, inertia 1063007.8302863322
Iteration 34, inertia 1062909.2626284184
Iteration 35, inertia 1062776.4580679566
Iteration 36, inertia 1062586.3701343553
Iteration 37, inertia 1062342.819166396
Iteration 38, inertia 1061965.2691590665
Iteration 39, inertia 1061374.971711272
Iteration 40, inertia 1060630.665688638
Iteration 41, inertia 1059912.0603499413
Iteration 42, inertia 1059377.2810996866
Iteration 43, inertia 1059071.3595787743
Iteration 44, inertia 1058885.8259530005
Iteration 45, inertia 1058770.2782726677
Iteration 46, inertia 1058685.9776619256
Iteration 47, inertia 1058626.6251569279
Iteration 48, inertia 1058573.0244780292
Iteration 49, inertia 1058516.8640162086
Iteration 50, inertia 1058455.5532407053
Iteration 51, inertia 1058385.8033518947
Iteration 52, inertia 1058305.6211516273
Iteration 53, inertia 1058195.5760436459
Iteration 54, inertia 1058034.0229115132
Iteration 55, inertia 1057821.3428421803
Iteration 56, inertia 1057561.734543161
Iteration 57, inertia 1057294.2388344912
Iteration 58, inertia 1057029.3986666582
Iteration 59, inertia 1056817.0081928251
Iteration 60, inertia 1056673.2028299319
Iteration 61, inertia 1056584.4836397655
Iteration 62, inertia 1056532.1476413126
Iteration 63, inertia 1056501.493979799
Iteration 64, inertia 1056484.615441137
Iteration 65, inertia 1056472.2155652544
Iteration 66, inertia 1056463.866086048
Iteration 67, inertia 1056458.4806436726
Iteration 68, inertia 1056454.8399684452
Iteration 69, inertia 1056451.6201729078
Iteration 70, inertia 1056448.7325498476
Iteration 71, inertia 1056445.8228834884
Iteration 72, inertia 1056442.7657642637
Iteration 73, inertia 1056439.2565826396
Iteration 74, inertia 1056434.9727333977
Iteration 75, inertia 1056431.9070099094
Iteration 76, inertia 1056428.8563668646
Iteration 77, inertia 1056426.0553972123
Iteration 78, inertia 1056424.040204331
Converged at iteration 78: center shift 8.294355896458048e-05 within tolerance 0.00010000000000000789.
Initialization complete
Iteration 0, inertia 1481004.6992672693
Iteration 1, inertia 1151723.5111618347
Iteration 2, inertia 1118025.4533933988
Iteration 3, inertia 1106926.274026199
Iteration 4, inertia 1101438.367066885
Iteration 5, inertia 1098170.0501537628
Iteration 6, inertia 1096066.9778469112
Iteration 7, inertia 1093959.3575540395
Iteration 8, inertia 1091261.8972434495
Iteration 9, inertia 1087336.521086591
Iteration 10, inertia 1082357.7996503597
Iteration 11, inertia 1079089.9355448065
Iteration 12, inertia 1077805.950502848
Iteration 13, inertia 1077052.167387624
Iteration 14, inertia 1076374.3672525089
Iteration 15, inertia 1075710.3080849806
Iteration 16, inertia 1075017.7497480959
Iteration 17, inertia 1074434.2008408068
Iteration 18, inertia 1073975.671423709
Iteration 19, inertia 1073645.7618919485
Iteration 20, inertia 1073418.4069973608
Iteration 21, inertia 1073262.0329617548
Iteration 22, inertia 1073147.0415727037
Iteration 23, inertia 1073062.525556698
Iteration 24, inertia 1072992.9295552443
Iteration 25, inertia 1072932.226669553
Iteration 26, inertia 1072866.292928011
Iteration 27, inertia 1072793.9933869515
Iteration 28, inertia 1072710.6318784188
Iteration 29, inertia 1072594.1928294217
Iteration 30, inertia 1072434.3678153679
Iteration 31, inertia 1072188.4111979539
Iteration 32, inertia 1071837.470523644
Iteration 33, inertia 1071407.8583750606
Iteration 34, inertia 1070985.771902862
Iteration 35, inertia 1070615.7857233952
Iteration 36, inertia 1070315.133106174
Iteration 37, inertia 1070071.6954434349
Iteration 38, inertia 1069813.432080524
Iteration 39, inertia 1069513.1026847577
Iteration 40, inertia 1069154.5258114587
Iteration 41, inertia 1068720.6721533719
Iteration 42, inertia 1068218.6447184903
Iteration 43, inertia 1067578.4989955402
Iteration 44, inertia 1066774.3121690569
Iteration 45, inertia 1065768.4339077924
Iteration 46, inertia 1064614.8485870115
Iteration 47, inertia 1063583.8515457623
Iteration 48, inertia 1062913.9978372105
Iteration 49, inertia 1062643.9768895397
Iteration 50, inertia 1062536.2095102891
Iteration 51, inertia 1062480.1934463005
Iteration 52, inertia 1062442.7308576854
Iteration 53, inertia 1062421.9193580071
Iteration 54, inertia 1062407.4635885195
Iteration 55, inertia 1062396.355058509
Iteration 56, inertia 1062388.8980699282
Iteration 57, inertia 1062383.6123284558
Iteration 58, inertia 1062379.8840699268
Iteration 59, inertia 1062376.7716375433
Iteration 60, inertia 1062374.2867137413
Iteration 61, inertia 1062372.062373369
Converged at iteration 61: center shift 9.561298688633168e-05 within tolerance 0.00010000000000000789.
Initialization complete
Iteration 0, inertia 1533481.9187964187
Iteration 1, inertia 1141022.7833333153
Iteration 2, inertia 1111155.4020968876
Iteration 3, inertia 1099516.2178008854
Iteration 4, inertia 1093500.488288347
Iteration 5, inertia 1090108.9627420371
Iteration 6, inertia 1087741.653824915
Iteration 7, inertia 1085755.184394873
Iteration 8, inertia 1083693.3415515418
Iteration 9, inertia 1081859.096344771
Iteration 10, inertia 1081227.4057550728
Iteration 11, inertia 1080686.6846978166
Iteration 12, inertia 1080131.350464464
Iteration 13, inertia 1079547.3679792145
Iteration 14, inertia 1078951.2425748568
Iteration 15, inertia 1078381.764592131
Iteration 16, inertia 1077824.6682510613
Iteration 17, inertia 1077261.1216450408
Iteration 18, inertia 1076671.9330196865
Iteration 19, inertia 1076111.6391608259
Iteration 20, inertia 1075593.556684215
Iteration 21, inertia 1075076.7548978366
Iteration 22, inertia 1074614.4708625488
Iteration 23, inertia 1074198.2815987836
Iteration 24, inertia 1073811.151690962
Iteration 25, inertia 1073489.2345411803
Iteration 26, inertia 1073216.5376026134
Iteration 27, inertia 1072997.4177629729
Iteration 28, inertia 1072837.4388617678
Iteration 29, inertia 1072720.314760919
Iteration 30, inertia 1072626.0907780563
Iteration 31, inertia 1072551.4392313287
Iteration 32, inertia 1072491.3405777218
Iteration 33, inertia 1072439.5309630686
Iteration 34, inertia 1072394.4293159195
Iteration 35, inertia 1072351.9268389586
Iteration 36, inertia 1072308.3199412385
Iteration 37, inertia 1072261.0380787032
Iteration 38, inertia 1072218.5762760725
Iteration 39, inertia 1072179.0849450931
Iteration 40, inertia 1072142.9535733769
Iteration 41, inertia 1072104.4283442306
Iteration 42, inertia 1072061.5394010097
Iteration 43, inertia 1072006.5554972412
Iteration 44, inertia 1071942.7896558691
Iteration 45, inertia 1071880.2072995566
Iteration 46, inertia 1071844.3775986482
Iteration 47, inertia 1071821.8696546457
Iteration 48, inertia 1071800.742329854
Iteration 49, inertia 1071782.065081264
Iteration 50, inertia 1071761.8704620854
Iteration 51, inertia 1071744.9171639706
Iteration 52, inertia 1071730.944002384
Iteration 53, inertia 1071718.896431847
Iteration 54, inertia 1071707.72482154
Iteration 55, inertia 1071697.0325495445
Iteration 56, inertia 1071686.204887337
Iteration 57, inertia 1071670.307996194
Iteration 58, inertia 1071651.5522898566
Iteration 59, inertia 1071635.9610689697
Iteration 60, inertia 1071619.7618995504
Iteration 61, inertia 1071603.462529547
Iteration 62, inertia 1071588.4260267755
Iteration 63, inertia 1071573.7711582186
Iteration 64, inertia 1071558.7130277792
Iteration 65, inertia 1071547.993367073
Iteration 66, inertia 1071539.4740874527
Iteration 67, inertia 1071530.4252345695
Iteration 68, inertia 1071520.6202985893
Iteration 69, inertia 1071513.1100882855
Iteration 70, inertia 1071505.071922007
Iteration 71, inertia 1071498.3739023365
Iteration 72, inertia 1071493.5422717195
Iteration 73, inertia 1071489.681235108
Iteration 74, inertia 1071487.4670391225
Converged at iteration 74: center shift 8.717316594747099e-05 within tolerance 0.00010000000000000789.
Initialization complete
Iteration 0, inertia 1528040.2246529965
Iteration 1, inertia 1132014.441343659
Iteration 2, inertia 1096128.4818956906
Iteration 3, inertia 1083533.3974377345
Iteration 4, inertia 1078865.6156553838
Iteration 5, inertia 1076121.4486851885
Iteration 6, inertia 1073558.5157769932
Iteration 7, inertia 1071107.7220839143
Iteration 8, inertia 1069102.9029251868
Iteration 9, inertia 1067622.7586647086
Iteration 10, inertia 1066568.794179264
Iteration 11, inertia 1065848.4040802477
Iteration 12, inertia 1065343.9059031007
Iteration 13, inertia 1064945.5080586167
Iteration 14, inertia 1064595.9972997725
Iteration 15, inertia 1064243.0981610145
Iteration 16, inertia 1063878.4567679302
Iteration 17, inertia 1063518.894244183
Iteration 18, inertia 1063126.5469427484
Iteration 19, inertia 1062704.494770205
Iteration 20, inertia 1062232.4549200027
Iteration 21, inertia 1061759.4127827992
Iteration 22, inertia 1061296.355658263
Iteration 23, inertia 1060897.1701654126
Iteration 24, inertia 1060543.8760499202
Iteration 25, inertia 1060266.017233957
Iteration 26, inertia 1060046.9607272744
Iteration 27, inertia 1059871.68806692
Iteration 28, inertia 1059739.9966508104
Iteration 29, inertia 1059645.3738772452
Iteration 30, inertia 1059572.214740185
Iteration 31, inertia 1059520.1092372953
Iteration 32, inertia 1059476.8590250073
Iteration 33, inertia 1059438.98927601
Iteration 34, inertia 1059408.330542772
Iteration 35, inertia 1059378.964154699
Iteration 36, inertia 1059356.676373178
Iteration 37, inertia 1059339.0079398362
Iteration 38, inertia 1059322.599733904
Iteration 39, inertia 1059305.6591112644
Iteration 40, inertia 1059289.183936796
Iteration 41, inertia 1059270.5782940204
Iteration 42, inertia 1059256.244436124
Iteration 43, inertia 1059237.6365943574
Iteration 44, inertia 1059217.527012061
Iteration 45, inertia 1059200.159859976
Iteration 46, inertia 1059184.4427141547
Iteration 47, inertia 1059169.497724323
Iteration 48, inertia 1059156.0230105342
Iteration 49, inertia 1059144.7047525654
Iteration 50, inertia 1059135.0483952186
Iteration 51, inertia 1059125.2469544273
Iteration 52, inertia 1059116.1608971434
Iteration 53, inertia 1059106.096855189
Iteration 54, inertia 1059093.6162468693
Iteration 55, inertia 1059081.2509976227
Iteration 56, inertia 1059071.336415391
Iteration 57, inertia 1059063.7248753135
Iteration 58, inertia 1059056.5072381685
Iteration 59, inertia 1059047.4995729045
Iteration 60, inertia 1059037.7777436632
Iteration 61, inertia 1059030.2554825335
Iteration 62, inertia 1059025.1158830528
Iteration 63, inertia 1059021.0003963353
Iteration 64, inertia 1059016.605331854
Iteration 65, inertia 1059012.2818199464
Iteration 66, inertia 1059007.748622384
Iteration 67, inertia 1059003.1868528523
Iteration 68, inertia 1058998.866969727
Iteration 69, inertia 1058994.170736548
Iteration 70, inertia 1058989.3014590035
Iteration 71, inertia 1058984.815117142
Iteration 72, inertia 1058980.2867537728
Iteration 73, inertia 1058976.2158306516
Iteration 74, inertia 1058972.4788188383
Iteration 75, inertia 1058968.8729347764
Iteration 76, inertia 1058964.9579482079
Iteration 77, inertia 1058960.290779928
Iteration 78, inertia 1058954.2704991892
Iteration 79, inertia 1058947.5497043321
Iteration 80, inertia 1058941.9138010992
Iteration 81, inertia 1058937.622062571
Iteration 82, inertia 1058933.9666619385
Iteration 83, inertia 1058930.9071479084
Iteration 84, inertia 1058928.4332417247
Iteration 85, inertia 1058926.473769887
Converged at iteration 85: center shift 9.4595908616319e-05 within tolerance 0.00010000000000000789.
Initialization complete
Iteration 0, inertia 1543090.5000349204
Iteration 1, inertia 1166324.3616256032
Iteration 2, inertia 1119224.5490564962
Iteration 3, inertia 1099799.5381588547
Iteration 4, inertia 1090897.1404651632
Iteration 5, inertia 1086576.1927144395
Iteration 6, inertia 1083438.8237490046
Iteration 7, inertia 1080391.611521363
Iteration 8, inertia 1077304.5908761937
Iteration 9, inertia 1074496.3241475555
Iteration 10, inertia 1072071.4839542503
Iteration 11, inertia 1069896.3185046273
Iteration 12, inertia 1067914.562606338
Iteration 13, inertia 1066318.316520494
Iteration 14, inertia 1065155.5177455682
Iteration 15, inertia 1064465.394332949
Iteration 16, inertia 1064087.9557392378
Iteration 17, inertia 1063895.3570493439
Iteration 18, inertia 1063798.9477325582
Iteration 19, inertia 1063748.1761253418
Iteration 20, inertia 1063714.4506418793
Iteration 21, inertia 1063692.7882838193
Iteration 22, inertia 1063676.089419221
Iteration 23, inertia 1063664.09702477
Iteration 24, inertia 1063655.030625566
Iteration 25, inertia 1063648.0774148665
Iteration 26, inertia 1063643.4748189265
Iteration 27, inertia 1063639.3800228618
Iteration 28, inertia 1063634.963167905
Iteration 29, inertia 1063631.3119743099
Iteration 30, inertia 1063628.6105679066
Iteration 31, inertia 1063626.602572974
Converged at iteration 31: center shift 9.386443328024657e-05 within tolerance 0.00010000000000000789.
Initialization complete
Iteration 0, inertia 1527430.9989983155
Iteration 1, inertia 1189477.661161892
Iteration 2, inertia 1148667.4448779915
Iteration 3, inertia 1131622.7503587177
Iteration 4, inertia 1121059.359653391
Iteration 5, inertia 1108791.408601461
Iteration 6, inertia 1092582.094986319
Iteration 7, inertia 1082379.1128686229
Iteration 8, inertia 1076073.621133802
Iteration 9, inertia 1071595.0938703306
Iteration 10, inertia 1068583.5183418733
Iteration 11, inertia 1066425.8086418065
Iteration 12, inertia 1064202.5906458988
Iteration 13, inertia 1062854.871153981
Iteration 14, inertia 1062342.6678998582
Iteration 15, inertia 1062121.1253384447
Iteration 16, inertia 1062002.6283166695
Iteration 17, inertia 1061941.40448264
Iteration 18, inertia 1061907.29805096
Iteration 19, inertia 1061885.5922303603
Iteration 20, inertia 1061872.2726760267
Iteration 21, inertia 1061863.3048171166
Iteration 22, inertia 1061857.068581753
Iteration 23, inertia 1061851.6947927603
Iteration 24, inertia 1061847.484437047
Iteration 25, inertia 1061845.4281391136
Iteration 26, inertia 1061843.9845963155
Iteration 27, inertia 1061842.7092984377
Converged at iteration 27: center shift 6.317482042283155e-05 within tolerance 0.00010000000000000789.
Initialization complete
Iteration 0, inertia 1530010.1767161565
Iteration 1, inertia 1159639.7293103684
Iteration 2, inertia 1112482.6460443856
Iteration 3, inertia 1093082.351264284
Iteration 4, inertia 1083981.2656860645
Iteration 5, inertia 1079032.6999509674
Iteration 6, inertia 1076208.6318160724
Iteration 7, inertia 1074385.7596911436
Iteration 8, inertia 1073195.3632660937
Iteration 9, inertia 1072076.6445564027
Iteration 10, inertia 1070853.6411840897
Iteration 11, inertia 1069680.7558374836
Iteration 12, inertia 1068722.5790395476
Iteration 13, inertia 1067988.3841193928
Iteration 14, inertia 1067356.2659693686
Iteration 15, inertia 1066712.180186866
Iteration 16, inertia 1066056.9153973889
Iteration 17, inertia 1065414.9024735999
Iteration 18, inertia 1064785.5843123326
Iteration 19, inertia 1064230.5987404934
Iteration 20, inertia 1063786.7603719388
Iteration 21, inertia 1063462.6357750278
Iteration 22, inertia 1063233.2777044037
Iteration 23, inertia 1063074.9347412293
Iteration 24, inertia 1062968.649961844
Iteration 25, inertia 1062884.2623089
Iteration 26, inertia 1062801.0268068856
Iteration 27, inertia 1062708.6831022594
Iteration 28, inertia 1062606.6605326962
Iteration 29, inertia 1062487.4745810644
Iteration 30, inertia 1062344.6313244142
Iteration 31, inertia 1062189.857764195
Iteration 32, inertia 1062034.5844359356
Iteration 33, inertia 1061885.7555336356
Iteration 34, inertia 1061770.172910868
Iteration 35, inertia 1061673.2146344283
Iteration 36, inertia 1061597.4049100603
Iteration 37, inertia 1061537.4300182697
Iteration 38, inertia 1061483.4165872629
Iteration 39, inertia 1061436.8619644928
Iteration 40, inertia 1061395.730660208
Iteration 41, inertia 1061361.6492275698
Iteration 42, inertia 1061334.5089811084
Iteration 43, inertia 1061309.9706417432
Iteration 44, inertia 1061285.9829691774
Iteration 45, inertia 1061266.2001872212
Iteration 46, inertia 1061251.5458898372
Iteration 47, inertia 1061241.1361962438
Iteration 48, inertia 1061232.0875043431
Iteration 49, inertia 1061225.7932694745
Iteration 50, inertia 1061219.086442478
Iteration 51, inertia 1061211.356667377
Iteration 52, inertia 1061204.0193982716
Iteration 53, inertia 1061196.2735246203
Iteration 54, inertia 1061189.9466036186
Iteration 55, inertia 1061184.9722401784
Iteration 56, inertia 1061180.7611597797
Iteration 57, inertia 1061177.7977453128
Iteration 58, inertia 1061175.6350618722
Iteration 59, inertia 1061173.6837396577
Iteration 60, inertia 1061171.8742801761
Iteration 61, inertia 1061169.728116058
Iteration 62, inertia 1061167.6811046973
Iteration 63, inertia 1061166.0375455136
Converged at iteration 63: center shift 9.567525712380937e-05 within tolerance 0.00010000000000000789.
Initialization complete
Iteration 0, inertia 1485366.34937874
Iteration 1, inertia 1166914.313650583
Iteration 2, inertia 1109185.6074101632
Iteration 3, inertia 1089592.468896896
Iteration 4, inertia 1082071.5882838317
Iteration 5, inertia 1077532.6295956601
Iteration 6, inertia 1074092.8014361109
Iteration 7, inertia 1071168.9119014665
Iteration 8, inertia 1068851.0039151972
Iteration 9, inertia 1067066.053030216
Iteration 10, inertia 1065704.3627450417
Iteration 11, inertia 1064727.908979444
Iteration 12, inertia 1063929.7708738698
Iteration 13, inertia 1063092.0022536183
Iteration 14, inertia 1062244.8513246507
Iteration 15, inertia 1061365.7790582113
Iteration 16, inertia 1060559.2299659806
Iteration 17, inertia 1060005.3881050162
Iteration 18, inertia 1059667.8845229698
Iteration 19, inertia 1059451.7571624236
Iteration 20, inertia 1059313.6659550834
Iteration 21, inertia 1059219.4251809565
Iteration 22, inertia 1059159.4900256381
Iteration 23, inertia 1059106.7873514341
Iteration 24, inertia 1059055.1664184304
Iteration 25, inertia 1059000.2897709426
Iteration 26, inertia 1058964.6554665957
Iteration 27, inertia 1058946.478830107
Iteration 28, inertia 1058933.7910261266
Iteration 29, inertia 1058922.2496031793
Iteration 30, inertia 1058913.1677040963
Iteration 31, inertia 1058904.5710288961
Iteration 32, inertia 1058894.9261501804
Iteration 33, inertia 1058885.0855612298
Iteration 34, inertia 1058876.0873081826
Iteration 35, inertia 1058866.8290601566
Iteration 36, inertia 1058856.420964137
Iteration 37, inertia 1058844.40159226
Iteration 38, inertia 1058830.957432061
Iteration 39, inertia 1058816.2007536655
Iteration 40, inertia 1058802.2619843865
Iteration 41, inertia 1058788.8399013635
Iteration 42, inertia 1058775.0185238845
Iteration 43, inertia 1058761.9433659778
Iteration 44, inertia 1058749.8952934751
Iteration 45, inertia 1058736.4358992109
Iteration 46, inertia 1058721.7671516917
Iteration 47, inertia 1058705.438174019
Iteration 48, inertia 1058687.3477819543
Iteration 49, inertia 1058667.74306436
Iteration 50, inertia 1058647.462391039
Iteration 51, inertia 1058626.2776998058
Iteration 52, inertia 1058602.3496328378
Iteration 53, inertia 1058571.0571717173
Iteration 54, inertia 1058533.6220375071
Iteration 55, inertia 1058493.718234758
Iteration 56, inertia 1058450.9670890549
Iteration 57, inertia 1058411.8092976185
Iteration 58, inertia 1058372.0378341132
Iteration 59, inertia 1058331.5588077034
Iteration 60, inertia 1058293.9539806114
Iteration 61, inertia 1058262.6127494525
Iteration 62, inertia 1058233.307646366
Iteration 63, inertia 1058204.9140189707
Iteration 64, inertia 1058180.5723503446
Iteration 65, inertia 1058157.4309788363
Iteration 66, inertia 1058139.2944931553
Iteration 67, inertia 1058124.9595231672
Iteration 68, inertia 1058111.4891680724
Iteration 69, inertia 1058099.3015343037
Iteration 70, inertia 1058087.1314855064
Iteration 71, inertia 1058074.3424989572
Iteration 72, inertia 1058062.2126820935
Iteration 73, inertia 1058048.574945352
Iteration 74, inertia 1058032.1363788838
Iteration 75, inertia 1058014.9218950823
Iteration 76, inertia 1057999.021546376
Iteration 77, inertia 1057980.6384130116
Iteration 78, inertia 1057959.1088955405
Iteration 79, inertia 1057932.288432137
Iteration 80, inertia 1057898.7485262025
Iteration 81, inertia 1057859.0110090794
Iteration 82, inertia 1057817.3759197462
Iteration 83, inertia 1057775.0948727496
Iteration 84, inertia 1057736.7018026111
Iteration 85, inertia 1057697.586648922
Iteration 86, inertia 1057660.3143450823
Iteration 87, inertia 1057625.8905078045
Iteration 88, inertia 1057589.7207763102
Iteration 89, inertia 1057550.2113983117
Iteration 90, inertia 1057501.2912635861
Iteration 91, inertia 1057449.808344935
Iteration 92, inertia 1057394.494191321
Iteration 93, inertia 1057339.1789444939
Iteration 94, inertia 1057287.4364556733
Iteration 95, inertia 1057238.1434121171
Iteration 96, inertia 1057187.3680744208
Iteration 97, inertia 1057137.4038580717
Iteration 98, inertia 1057093.4916064262
Iteration 99, inertia 1057051.8725348955
Iteration 100, inertia 1057010.7370654652
Iteration 101, inertia 1056968.8927394166
Iteration 102, inertia 1056928.2504798614
Iteration 103, inertia 1056892.2883706912
Iteration 104, inertia 1056858.581822213
Iteration 105, inertia 1056829.7512072735
Iteration 106, inertia 1056803.3847363575
Iteration 107, inertia 1056782.0109399306
Iteration 108, inertia 1056764.6385593202
Iteration 109, inertia 1056749.464780469
Iteration 110, inertia 1056737.1231428802
Iteration 111, inertia 1056727.4188840124
Iteration 112, inertia 1056720.0695550372
Iteration 113, inertia 1056713.917640718
Iteration 114, inertia 1056708.5976807354
Iteration 115, inertia 1056705.2803011325
Iteration 116, inertia 1056701.9135425468
Iteration 117, inertia 1056697.8815472107
Iteration 118, inertia 1056693.8380570295
Iteration 119, inertia 1056689.9871340892
Iteration 120, inertia 1056686.0548176991
Iteration 121, inertia 1056682.48471731
Iteration 122, inertia 1056679.2074778152
Iteration 123, inertia 1056676.1694588363
Iteration 124, inertia 1056672.5157562606
Iteration 125, inertia 1056668.277051697
Iteration 126, inertia 1056664.1859588255
Iteration 127, inertia 1056660.9171111616
Iteration 128, inertia 1056657.8133843595
Iteration 129, inertia 1056654.7874933477
Iteration 130, inertia 1056651.9978181226
Iteration 131, inertia 1056649.683152547
Iteration 132, inertia 1056647.6377632297
Iteration 133, inertia 1056645.6947906385
Converged at iteration 133: center shift 9.970076741224624e-05 within tolerance 0.00010000000000000789.
Initialization complete
Iteration 0, inertia 1533726.2830152828
Iteration 1, inertia 1135317.3774418416
Iteration 2, inertia 1099930.2580672416
Iteration 3, inertia 1085748.9516291567
Iteration 4, inertia 1078421.0872950258
Iteration 5, inertia 1074224.463333758
Iteration 6, inertia 1071223.7175547474
Iteration 7, inertia 1068673.7846402095
Iteration 8, inertia 1066213.7900457538
Iteration 9, inertia 1064011.2861757297
Iteration 10, inertia 1062369.7206848466
Iteration 11, inertia 1061361.0495962086
Iteration 12, inertia 1060745.0512778722
Iteration 13, inertia 1060354.669498197
Iteration 14, inertia 1060090.1435792325
Iteration 15, inertia 1059892.8804547815
Iteration 16, inertia 1059736.049663461
Iteration 17, inertia 1059613.105757206
Iteration 18, inertia 1059519.1006844048
Iteration 19, inertia 1059447.8909466825
Iteration 20, inertia 1059396.200936344
Iteration 21, inertia 1059359.1100332565
Iteration 22, inertia 1059331.7554745025
Iteration 23, inertia 1059311.2234102066
Iteration 24, inertia 1059292.445693493
Iteration 25, inertia 1059277.1914786512
Iteration 26, inertia 1059264.9347930485
Iteration 27, inertia 1059255.817714924
Iteration 28, inertia 1059247.5494275303
Iteration 29, inertia 1059240.2534132998
Iteration 30, inertia 1059234.1783999188
Iteration 31, inertia 1059229.3602603008
Iteration 32, inertia 1059225.632075968
Iteration 33, inertia 1059222.8040312969
Iteration 34, inertia 1059220.101204206
Iteration 35, inertia 1059217.3366438097
Iteration 36, inertia 1059215.080626023
Iteration 37, inertia 1059213.1263108135
Converged at iteration 37: center shift 7.546514179991207e-05 within tolerance 0.00010000000000000789.
[Pipeline] ............ (step 2 of 2) Processing kmeans, total=  25.6s
In [20]:
spotify_data
Out[20]:
valence year acousticness artists danceability duration_ms energy explicit id instrumentalness key liveness loudness mode name popularity release_date speechiness tempo cluster_label
0 0.0594 1921 0.98200 ['Sergei Rachmaninoff', 'James Levine', 'Berli... 0.279 831667 0.211 0 4BJqT0PrAfrxzMOxytFOIz 0.878000 10 0.6650 -20.096 1 Piano Concerto No. 3 in D Minor, Op. 30: III. ... 4 1921 0.0366 80.954 12
1 0.9630 1921 0.73200 ['Dennis Day'] 0.819 180533 0.341 0 7xPhfUan2yNtyFG0cUWkt8 0.000000 7 0.1600 -12.441 1 Clancy Lowered the Boom 5 1921 0.4150 60.936 11
2 0.0394 1921 0.96100 ['KHP Kridhamardawa Karaton Ngayogyakarta Hadi... 0.328 500062 0.166 0 1o6I8BglA6ylDMrIELygv1 0.913000 3 0.1010 -14.850 1 Gati Bali 5 1921 0.0339 110.339 3
3 0.1650 1921 0.96700 ['Frank Parker'] 0.275 210000 0.309 0 3ftBPsC5vPBKxYSee08FDH 0.000028 5 0.3810 -9.316 1 Danny Boy 3 1921 0.0354 100.109 17
4 0.2530 1921 0.95700 ['Phil Regan'] 0.418 166693 0.193 0 4d6HGyGT8e121BsdKmw9v6 0.000002 3 0.2290 -10.096 1 When Irish Eyes Are Smiling 2 1921 0.0380 101.665 17
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
170648 0.6080 2020 0.08460 ['Anuel AA', 'Daddy Yankee', 'KAROL G', 'Ozuna... 0.786 301714 0.808 0 0KkIkfsLEJbrcIhYsCL7L5 0.000289 7 0.0822 -3.702 1 China 72 2020-05-29 0.0881 105.029 0
170649 0.7340 2020 0.20600 ['Ashnikko'] 0.717 150654 0.753 0 0OStKKAuXlxA0fMH54Qs6E 0.000000 7 0.1010 -6.020 1 Halloweenie III: Seven Days 68 2020-10-23 0.0605 137.936 4
170650 0.6370 2020 0.10100 ['MAMAMOO'] 0.634 211280 0.858 0 4BZXVFYCb76Q0Klojq4piV 0.000009 4 0.2580 -2.226 0 AYA 76 2020-11-03 0.0809 91.688 16
170651 0.1950 2020 0.00998 ['Eminem'] 0.671 337147 0.623 1 5SiZJoLXp3WOl3J4C8IK0d 0.000008 2 0.6430 -7.161 1 Darkness 70 2020-01-17 0.3080 75.055 14
170652 0.6420 2020 0.13200 ['KEVVO', 'J Balvin'] 0.856 189507 0.721 1 7HmnJHfs0BkFzX4x8j0hkl 0.004710 7 0.1820 -4.928 1 Billetes Azules (with J Balvin) 74 2020-10-16 0.1080 94.991 14

170653 rows × 20 columns

Visualizing the Song Clusters with PCA¶

In [21]:
from sklearn.decomposition import PCA
pca_pipeline = Pipeline([('scaler', StandardScaler()),
                        ('PCA', PCA(n_components=2))])
song_embedding = pca_pipeline.fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=song_embedding)
projection['title'] = spotify_data['name']
projection['cluster'] = spotify_data['cluster_label']
In [22]:
import plotly.express as px
fig = px.scatter(projection, x='x', y='y', color='cluster',
                hover_data=['x', 'y', 'title'])
fig.show()
In [23]:
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict
In [24]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
    client_id='2a1633b8d0e94800b7a7707870dad747',
    client_secret='8e416de66f734433ac87282497c1c6b1'
))
In [25]:
def find_song(name, year):
    """
    Returns a dataframe with data for a song given the name and release year.
    Uses Spotify to fetch audio features and metadata for the specified song.
    """
    song_data = defaultdict()
    results = sp.search(q= 'track: {} year: {}'.format(name, year), limit=1)
    
    if not results['tracks']['items']:
        return None
    
    results = results['tracks']['items'][0]
    
    track_id = results['id']
    audio_features = sp.audio_features(track_id)[0]
    song_data['name'] = [name]
    song_data['year'] = [year]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['duration_ms'] = [results['duration_ms']]
    song_data['popularity'] = [results['popularity']]
    
    for key, value in audio_features.items():
        song_data[key] = value
    
    return pd.DataFrame(song_data)
In [26]:
def flatten_dict_list(songs_list):
    flattened_dict = defaultdict()
    
    for key in songs_list[0].keys():
        flattened_dict[key] = []
        
    for dictionary in songs_list:
        for key, value in dictionary.items():
            flattened_dict[key].append(value)
            
    return flattened_dict
In [27]:
def get_song_data(song, spotify_data):
    try:
        song_data = spotify_data[(spotify_data['name'] == song['name'])
                                 & (spotify_data['year'] == song['year'])].iloc[0]
    except IndexError:
        return find_song(song['name'], song['year'])
    return song_data
In [28]:
def get_mean_vector(songs_list, spotify_data):
    """
    Mean vector for a list of songs
    """
    song_vectors = []
    for song in songs_list:
        song_data = get_song_data(song, spotify_data)
        if song_data is None:
            print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
        song_vector = song_data[number_cols].values
        song_vectors.append(song_vector)
    song_matrix = np.array(list(song_vectors))
    return np.mean(song_matrix, axis=0)
In [29]:
from scipy.spatial.distance import cdist

def recommended_songs(songs_list, spotify_data, n_songs=10):
    """
    Recommends songs based on a list of previous songs that a user
    has listened to.
    """
    cols = ['name', 'year', 'artists']
    song_dict = flatten_dict_list(songs_list)
    song_center = get_mean_vector(songs_list, spotify_data)
    scaler = song_cluster_pipeline.steps[0][1]
    scaled_data = scaler.transform(spotify_data[number_cols])
    scaled_song_center = scaler.transform(song_center.reshape(1, -1))
    distances = cdist(scaled_song_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_songs][0])
    rec_songs = spotify_data.iloc[index]
    rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
    return rec_songs[cols].to_dict(orient='records')
In [30]:
recommended_songs([
    {'name': 'Come As You Are', 'year':1991},
    {'name': 'Smells Like Teen Spirit', 'year': 1991},
    {'name': 'Lithium', 'year': 1992},
    {'name': 'All Apologies', 'year': 1993},
    {'name': 'Stay Away', 'year': 1993}
],  spotify_data)
C:\Users\RS\AppData\Local\Temp\ipykernel_14464\3647570856.py:12: VisibleDeprecationWarning:

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.

D:\G\ss\lib\site-packages\sklearn\base.py:450: UserWarning:

X does not have valid feature names, but StandardScaler was fitted with feature names

Out[30]:
[{'name': 'Life is a Highway - From "Cars"',
  'year': 2009,
  'artists': "['Rascal Flatts']"},
 {'name': 'Of Wolf And Man', 'year': 1991, 'artists': "['Metallica']"},
 {'name': 'Somebody Like You', 'year': 2002, 'artists': "['Keith Urban']"},
 {'name': 'Corazón Mágico', 'year': 1995, 'artists': "['Los Fugitivos']"},
 {'name': 'Kayleigh', 'year': 1992, 'artists': "['Marillion']"},
 {'name': 'Little Secrets', 'year': 2009, 'artists': "['Passion Pit']"},
 {'name': 'No Excuses', 'year': 1994, 'artists': "['Alice In Chains']"},
 {'name': "Let's Get Rocked", 'year': 1992, 'artists': "['Def Leppard']"},
 {'name': 'If Today Was Your Last Day',
  'year': 2008,
  'artists': "['Nickelback']"},
 {'name': "Things I'll Never Say",
  'year': 2002,
  'artists': "['Avril Lavigne']"}]
In [31]:
recommended_songs([{'name':'Beat It', 'year': 1982},
                 {'name': 'Billie Jean', 'year': 1988},
                 {'name': 'Thriller', 'year': 1982}], spotify_data)
C:\Users\RS\AppData\Local\Temp\ipykernel_14464\3647570856.py:12: VisibleDeprecationWarning:

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.

D:\G\ss\lib\site-packages\sklearn\base.py:450: UserWarning:

X does not have valid feature names, but StandardScaler was fitted with feature names

Out[31]:
[{'name': 'Hot Legs', 'year': 1977, 'artists': "['Rod Stewart']"},
 {'name': 'Thriller - 2003 Edit',
  'year': 2003,
  'artists': "['Michael Jackson']"},
 {'name': "I Didn't Mean To Turn You On",
  'year': 1984,
  'artists': "['Cherrelle']"},
 {'name': 'Stars On 45 - Original Single Version',
  'year': 1981,
  'artists': "['Stars On 45']"},
 {'name': "Stars On '89 Remix - Radio Version",
  'year': 1984,
  'artists': "['Stars On 45']"},
 {'name': 'Take Me to the River - Live',
  'year': 1984,
  'artists': "['Talking Heads']"},
 {'name': 'Nothing Can Stop Us', 'year': 1992, 'artists': "['Saint Etienne']"}]
In [32]:
recommended_songs([{'name':'when you say nothing at all', 'year': 1999},], spotify_data)
D:\G\ss\lib\site-packages\sklearn\base.py:450: UserWarning:

X does not have valid feature names, but StandardScaler was fitted with feature names

Out[32]:
[{'name': 'Tell It Like It Is',
  'year': 1989,
  'artists': "['Wayne Toups', 'Zydecajun']"},
 {'name': 'In a Different Light', 'year': 1990, 'artists': "['Doug Stone']"},
 {'name': 'Born Country', 'year': 1991, 'artists': "['Alabama']"},
 {'name': 'What You Need', 'year': 2014, 'artists': "['Dan Sacks']"},
 {'name': 'Adelante Corazón', 'year': 1989, 'artists': "['Daniela Romo']"},
 {'name': 'Come With Me', 'year': 1995, 'artists': "['Shai']"},
 {'name': 'Shot Full Of Love', 'year': 1991, 'artists': "['Chris LeDoux']"},
 {'name': 'Happy, Happy Birthday Baby',
  'year': 1986,
  'artists': "['Ronnie Milsap']"},
 {'name': 'Too Busy Being In Love', 'year': 1990, 'artists': "['Doug Stone']"},
 {'name': 'Por Una Mujer Bonita',
  'year': 1998,
  'artists': "['Miguel Y Miguel']"}]
In [33]:
recommended_songs([{'name':'rock you like a hurricane', 'year': 1984},], spotify_data)
D:\G\ss\lib\site-packages\sklearn\base.py:450: UserWarning:

X does not have valid feature names, but StandardScaler was fitted with feature names

Out[33]:
[{'name': 'Rock You Like A Hurricane',
  'year': 1984,
  'artists': "['Scorpions']"},
 {'name': 'Barracuda', 'year': 1977, 'artists': "['Heart']"},
 {'name': 'While My Guitar Gently Weeps - Remastered 2009',
  'year': 1968,
  'artists': "['The Beatles']"},
 {'name': 'No Easy Way Out - From "Rocky IV" Soundtrack',
  'year': 1985,
  'artists': "['Robert Tepper']"},
 {'name': 'Youth Gone Wild', 'year': 1989, 'artists': "['Skid Row']"},
 {'name': 'Woodstock',
  'year': 1970,
  'artists': "['Crosby, Stills, Nash & Young']"},
 {'name': 'Careless Whisper', 'year': 1998, 'artists': "['George Michael']"},
 {'name': 'Jesus Is Just Alright with Me - 2007 Remaster',
  'year': 1976,
  'artists': "['The Doobie Brothers']"},
 {'name': 'No Easy Way Out - From "Rocky IV" Soundtrack',
  'year': 1986,
  'artists': "['Robert Tepper']"},
 {'name': 'Scatman (ski-ba-bop-ba-dop-bop)',
  'year': 1995,
  'artists': "['Scatman John']"}]
In [36]:
recommended_songs([{'name':'chemical. post malone', 'year': 2023},], spotify_data)
D:\G\ss\lib\site-packages\sklearn\base.py:450: UserWarning:

X does not have valid feature names, but StandardScaler was fitted with feature names

Out[36]:
[{'name': 'bloody valentine',
  'year': 2020,
  'artists': "['Machine Gun Kelly']"},
 {'name': 'bloody valentine',
  'year': 2020,
  'artists': "['Machine Gun Kelly']"},
 {'name': 'bad idea!', 'year': 2019, 'artists': "['girl in red']"},
 {'name': 'UCLA', 'year': 2018, 'artists': "['RL Grime', '24hrs']"},
 {'name': 'Who Do You Love',
  'year': 2019,
  'artists': "['The Chainsmokers', '5 Seconds of Summer']"},
 {'name': 'Legends', 'year': 2018, 'artists': "['Juice WRLD']"},
 {'name': 'Cry Alone', 'year': 2018, 'artists': "['Lil Peep']"},
 {'name': 'Feels Great (feat. Fetty Wap & CVBZ)',
  'year': 2017,
  'artists': "['Cheat Codes', 'Fetty Wap', 'CVBZ']"},
 {'name': 'Nightmares (feat. Lil Skies)',
  'year': 2019,
  'artists': "['Yung Pinch', 'Lil Skies']"},
 {'name': 'Man Of The Year', 'year': 2020, 'artists': "['Juice WRLD']"}]